1… 10
11.a A movie should appear in the dataset at least 18 times. Each has a record for the weekend (Friday, Saturday and Sunday) from the opening weekend to at least 6 weekends later (for the ones kept). The ones dropped were not in theaters for more than 6 weekends.
11.b
#keeping films that aren't dropped
films_used <- films |>
filter(dropped != 1)
11.c
# day when 12 Rounds came in
round_12_date <- as.Date("2009-03-27")
# Define the number of days to add
days_before <- 17984 #number under 12 Rounds "date" column
# Days prior to the
reference_date <- round_12_date - days_before
# Print the new date
print(reference_date)
## [1] "1959-12-31"
11.d
films_used_d <- films_used |>
mutate(movie_date = as.Date(reference_date + date)) |>
#putting the release_date in the 4th column
select(title, production_budget, release_yr,
movie_date, sat_date, everything())
films_used_d[, c("title", "movie_date")]
11.e
#first using sat_date to get the date for each saturday
films_used_date <- films_used_d |>
mutate(sat_day = as.Date(reference_date + sat_date)) |>
#putting the release_date in the 4th column
select(title, production_budget, release_yr,
movie_date, sat_day, everything())
#making new columns
films_used_date <- films_used_date |>
mutate(sat_dummy = ifelse(movie_date == sat_day, 1, 0),
#one day before saturday is friday
fri_dummy = ifelse(movie_date == sat_day - 1, 1, 0),
#one day
sun_dummy = ifelse(movie_date == sat_day + 1, 1, 0)) |>
#rearranging... not needed
select(title, production_budget, release_yr, movie_date,
sat_day,sat_dummy, fri_dummy, sun_dummy, everything())
films_used_date[, c("title", "movie_date","sat_day"
,"fri_dummy", "sat_dummy", "sun_dummy")]
11.f
#creating dummies for week using fastDummies
films_used_date <- films_used_date |>
arrange(title, sat_day) |>
group_by(title) |>
# Assign numeric labels to unique elements of sat_day within each title
mutate(week = as.integer(factor(sat_day)))
#Now using fast dummies...
films_used_date <- dummy_cols(films_used_date, select_columns = 'week')
films_used_date[, c("title", "movie_date" ,"week_1", "week_2")]
11.g
#using the "Fast Dummies" library... to automatically create dummies for year
film <- dummy_cols(films_used_date, select_columns = 'release_yr')
film[, c("title", "release_yr", "release_yr_2009", "release_yr_2010")]
11.h
#combine the weekends
film |>
mutate(weekend = case_when(
sat_dummy == 1 ~ "Saturday",
fri_dummy == 1 ~ "Friday",
sun_dummy == 1 ~ "Sunday"
)) |>
group_by(week, weekend) |>
summarize(mean = mean(tickets))|>
ggplot(aes(x = week, y = mean, color = as.factor(weekend))) +
geom_point() +
geom_line() +
scale_color_manual(values = c("Saturday" = "#4682B4",
"Friday" = "red",
"Sunday" = "#8B008B")) +
labs(color = "Weekend",
y = "Tickets",
x = "Week") +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6)) + # Set x-axis ticks
scale_y_continuous(breaks = scales::pretty_breaks(n = 6)) + # Set y-axis ticks
theme_bw()
## `summarise()` has grouped output by 'week'. You can override using the
## `.groups` argument.
## 12 NOT NEEDED
#subset colnames that have the hh in them
holiday <- str_subset(colnames(film), "hh")
#make the things in holiday "add"
holiday_dummy <- str_c(holiday, collapse = " + ")
#day of the week dummies
weekend_dummy <- str_c(str_subset(colnames(film), "dummy"), collapse = " + ")
#week of the year dummies
week_dummy <- str_c(str_subset(colnames(film), "week_"), collapse = " + ")
#year of the week dummy
year_dummy <- str_c(str_subset(colnames(film), "release_yr_"), collapse = " + ")
#combine
mod1 <- glue("tickets ~ {weekend_dummy} + {week_dummy} + {year_dummy} + {holiday_dummy}")
#fit a regression model
reg_mod1 <- lm(as.formula(mod1), data = film)
film <- film |>
mutate(pred_tickets = predict(reg_mod1, film)) |>
mutate(abnormal_viewership = tickets - pred_tickets)
film[, c("tickets","pred_tickets", "abnormal_viewership", "sat_day")]
weather <- read_dta("data/weather_collapsed_all.dta")
#adding www to the column names
original_cols <- colnames(weather)
# adding prefix using the paste
colnames(weather) <- paste("www", original_cols, sep = "_")
weather
weather_film <- film |>
left_join(weather,
#combine on dates, automatically filters out dates that don't match
by = c("movie_date" = "www_sat_date"))
weather_film |>
select(contains("www"))